geom_hline(aes(yintercept=0), color="red", linetype="dashed") +
ylab("Susceptibility to Automation (Arntz et al.)") + xlab("Occupations (ISCO88) by Task Group, Weighted by Share") +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
legend.position ="bottom",
legend.title=element_blank())
plot_t3risk + guides(size=FALSE)
task3arntz$xlabel <- factor(task3arntz$isco2d , levels = task3arntz[order(task3arntz$task3), 1])
head(task3arntz)
# Arntz et al ----
arntz_full <- read.dta13(paste0(path, "2_shhighrisk_cntry_isco.dta"))
# create crosswalk isco08 to isco88, 2-digit
iscocw <- read.csv(paste0(path, "correspondence.csv"), stringsAsFactors = FALSE)
iscocw$isco08[iscocw$isco08==110 | iscocw$isco08==210 | iscocw$isco08==310] <- 10
iscocw$isco082d <- substr(iscocw$isco08, 1, 2)
iscocw$isco88[iscocw$isco88==110] <- 10
iscocw$isco882d <- substr(iscocw$isco88, 1, 2)
iscocw <- iscocw %>% dplyr::select(isco082d, isco882d)
iscocw <- iscocw %>% mutate(one=1) %>%
group_by(isco082d, isco882d) %>% mutate(nr_08_per_88=sum(one)) %>%
ungroup()
iscocw2d <- iscocw %>% group_by(isco082d) %>%
filter(nr_08_per_88==max(nr_08_per_88)) %>%
filter(row_number(nr_08_per_88) == 1) %>%
dplyr::select(isco082d, isco882d) %>%
ungroup()
# prepare arntz data
# pool data across relevant countries
arntz_pool <- arntz_full %>% filter(country=="Germany" | country=="United Kingdom") %>%
group_by(isco08dig2) %>%
summarise(arntz_score = weighted.mean(sh_highrisk, obs)) %>%
ungroup() %>%
dplyr::rename(isco082d=isco08dig2)
arntz <- merge(arntz_pool, iscocw2d, by="isco082d")
arntz <- arntz %>% dplyr::select(isco882d, arntz_score) %>% dplyr::rename(isco2d=isco882d)
View(arntz)
# Arntz et al ----
arntz_full <- read.dta13(paste0(path, "2_shhighrisk_cntry_isco.dta"))
# create crosswalk isco08 to isco88, 2-digit
iscocw <- read.csv(paste0(path, "correspondence.csv"), stringsAsFactors = FALSE)
iscocw$isco08[iscocw$isco08==110 | iscocw$isco08==210 | iscocw$isco08==310] <- 10
iscocw$isco082d <- substr(iscocw$isco08, 1, 2)
iscocw$isco88[iscocw$isco88==110] <- 10
iscocw$isco882d <- substr(iscocw$isco88, 1, 2)
iscocw <- iscocw %>% dplyr::select(isco082d, isco882d)
iscocw <- iscocw %>% mutate(one=1) %>%
group_by(isco082d, isco882d) %>% mutate(nr_08_per_88=sum(one)) %>%
ungroup()
iscocw2d <- iscocw %>% group_by(isco082d) %>%
filter(nr_08_per_88==max(nr_08_per_88)) %>%
filter(row_number(nr_08_per_88) == 1) %>%
dplyr::select(isco082d, isco882d) %>%
ungroup()
# prepare arntz data
# pool data across relevant countries
arntz_pool <- arntz_full %>% filter(country=="Germany" | country=="United Kingdom") %>%
group_by(isco08dig2) %>%
summarise(arntz_score = weighted.mean(sh_highrisk, obs)) %>%
ungroup() %>%
dplyr::rename(isco082d=isco08dig2)
View(arntz_pool)
View(iscocw2d)
View(arntz_pool)
View(iscocw2d)
arntz <- merge(arntz_pool, iscocw2d, by="isco082d")
View(arntz)
# Arntz et al ----
arntz_full <- read.dta13(paste0(path, "2_shhighrisk_cntry_isco.dta"))
# create crosswalk isco08 to isco88, 2-digit
iscocw <- read.csv(paste0(path, "correspondence.csv"), stringsAsFactors = FALSE)
iscocw$isco08[iscocw$isco08==110 | iscocw$isco08==210 | iscocw$isco08==310] <- 10
iscocw$isco082d <- substr(iscocw$isco08, 1, 2)
iscocw$isco88[iscocw$isco88==110] <- 10
iscocw$isco882d <- substr(iscocw$isco88, 1, 2)
iscocw <- iscocw %>% dplyr::select(isco082d, isco882d)
iscocw <- iscocw %>% mutate(one=1) %>%
group_by(isco082d, isco882d) %>% mutate(nr_08_per_88=sum(one)) %>%
ungroup()
iscocw2d <- iscocw %>% group_by(isco082d) %>%
filter(nr_08_per_88==max(nr_08_per_88)) %>%
filter(row_number(nr_08_per_88) == 1) %>%
dplyr::select(isco082d, isco882d) %>%
ungroup()
# prepare arntz data
# pool data across relevant countries
arntz_pool <- arntz_full %>% filter(country=="Germany" | country=="United Kingdom") %>%
group_by(isco08dig2) %>%
summarise(arntz_score = weighted.mean(sh_highrisk, obs)) %>%
ungroup() %>%
dplyr::rename(isco082d=isco08dig2)
arntz <- merge(arntz_pool, iscocw2d, by="isco082d")
arntz <- arntz %>% dplyr::select(isco882d, arntz_score) %>% dplyr::rename(isco2d=isco882d)
View(arntz)
arntz <- merge(arntz_pool, iscocw2d, by="isco082d")
arntz <- arntz %>% dplyr::select(isco882d, arntz_score) %>% dplyr::rename(isco2d=isco882d) %>%
group_by(isco2d) %>% summarise(arntz_score = mean(arntz_score, na.rm=T))
View(arntz)
task3arntz <- merge(cw_task3_isco, arntz, by=c("isco2d"), all=TRUE)
# increase distance between task group with missing value analoguously isco=10
placeholder <- c(50, 3, 0.4, NA, NA)
task3arntz <- rbind(task3arntz, placeholder)
task3arntz <- arrange(task3arntz, isco2d)
task3arntz$xlabel <- factor(task3arntz$isco2d , levels = task3arntz[order(task3arntz$task3), 1])
task3arntz$legend <- NA
task3arntz$legend[task3arntz$task3==1] <- "Non-Routine Cognitive"
task3arntz$legend[task3arntz$task3==2] <- "Routine"
task3arntz$legend[task3arntz$task3==3] <- "Non-Routine Manual"
task3arntz$legend <- factor(task3arntz$legend, levels = c("Non-Routine Cognitive", "Routine", "Non-Routine Manual"))
task3arntz <- filter(task3arntz, !is.na(task3))
# plotting
ggplot(task3arntz, aes(x=legend, y=arntz_score)) +
geom_hline(aes(yintercept=0), color="red", linetype="dashed") +
geom_boxplot() +
ylab("Susceptibility to Automation (Arntz et al.)") + xlab("Boxplots by Task Group")
plot_t3risk <- ggplot(task3arntz, aes(x=xlabel, y=arntz_score, group=factor(legend), shape=factor(legend), color=factor(legend))) +
geom_point(aes(x=xlabel, y=arntz_score, group=factor(legend), shape=factor(legend), color=factor(legend), size=weight)) +
geom_text(aes(label=isco2d), vjust=2, color="black") +
scale_colour_grey() +
geom_hline(aes(yintercept=0), color="red", linetype="dashed") +
ylab("Susceptibility to Automation (Arntz et al.)") + xlab("Occupations (ISCO88) by Task Group, Weighted by Share") +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
legend.position ="bottom",
legend.title=element_blank())
plot_t3risk + guides(size=FALSE)
arntz_full <- read.dta13(paste0(path, "2_shhighrisk_cntry_isco.dta"))
# create crosswalk isco08 to isco88, 2-digit
iscocw <- read.csv(paste0(path, "correspondence.csv"), stringsAsFactors = FALSE)
iscocw$isco08[iscocw$isco08==110 | iscocw$isco08==210 | iscocw$isco08==310] <- 10
iscocw$isco082d <- substr(iscocw$isco08, 1, 2)
iscocw$isco88[iscocw$isco88==110] <- 10
iscocw$isco882d <- substr(iscocw$isco88, 1, 2)
iscocw <- iscocw %>% dplyr::select(isco082d, isco882d)
iscocw <- iscocw %>% mutate(one=1) %>%
group_by(isco082d, isco882d) %>% mutate(nr_08_per_88=sum(one)) %>%
ungroup()
iscocw2d <- iscocw %>% group_by(isco082d) %>%
filter(nr_08_per_88==max(nr_08_per_88)) %>%
filter(row_number(nr_08_per_88) == 1) %>%
dplyr::select(isco082d, isco882d) %>%
ungroup()
# prepare arntz data
# pool data across relevant countries
arntz_pool <- arntz_full %>%
group_by(isco08dig2) %>%
summarise(arntz_score = weighted.mean(sh_highrisk, obs)) %>%
ungroup() %>%
dplyr::rename(isco082d=isco08dig2)
arntz <- merge(arntz_pool, iscocw2d, by="isco082d")
arntz <- arntz %>% dplyr::select(isco882d, arntz_score) %>% dplyr::rename(isco2d=isco882d) %>%
group_by(isco2d) %>% summarise(arntz_score = mean(arntz_score, na.rm=T))
# merge
task3arntz <- merge(cw_task3_isco, arntz, by=c("isco2d"), all=TRUE)
# increase distance between task group with missing value analoguously isco=10
placeholder <- c(50, 3, 0.4, NA, NA)
task3arntz <- rbind(task3arntz, placeholder)
task3arntz <- arrange(task3arntz, isco2d)
task3arntz$xlabel <- factor(task3arntz$isco2d , levels = task3arntz[order(task3arntz$task3), 1])
task3arntz$legend <- NA
task3arntz$legend[task3arntz$task3==1] <- "Non-Routine Cognitive"
task3arntz$legend[task3arntz$task3==2] <- "Routine"
task3arntz$legend[task3arntz$task3==3] <- "Non-Routine Manual"
task3arntz$legend <- factor(task3arntz$legend, levels = c("Non-Routine Cognitive", "Routine", "Non-Routine Manual"))
task3arntz <- filter(task3arntz, !is.na(task3))
# plotting
ggplot(task3arntz, aes(x=legend, y=arntz_score)) +
geom_hline(aes(yintercept=0), color="red", linetype="dashed") +
geom_boxplot() +
ylab("Susceptibility to Automation (Arntz et al.)") + xlab("Boxplots by Task Group")
plot_t3risk <- ggplot(task3arntz, aes(x=xlabel, y=arntz_score, group=factor(legend), shape=factor(legend), color=factor(legend))) +
geom_point(aes(x=xlabel, y=arntz_score, group=factor(legend), shape=factor(legend), color=factor(legend), size=weight)) +
geom_text(aes(label=isco2d), vjust=2, color="black") +
scale_colour_grey() +
geom_hline(aes(yintercept=0), color="red", linetype="dashed") +
ylab("Susceptibility to Automation (Arntz et al.)") + xlab("Occupations (ISCO88) by Task Group, Weighted by Share") +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
legend.position ="bottom",
legend.title=element_blank())
plot_t3risk + guides(size=FALSE)
# Arntz et al ----
arntz_full <- read.dta13(paste0(path, "2_shhighrisk_cntry_isco.dta"))
# create crosswalk isco08 to isco88, 2-digit
iscocw <- read.csv(paste0(path, "correspondence.csv"), stringsAsFactors = FALSE)
iscocw$isco08[iscocw$isco08==110 | iscocw$isco08==210 | iscocw$isco08==310] <- 10
iscocw$isco082d <- substr(iscocw$isco08, 1, 2)
iscocw$isco88[iscocw$isco88==110] <- 10
iscocw$isco882d <- substr(iscocw$isco88, 1, 2)
iscocw <- iscocw %>% dplyr::select(isco082d, isco882d)
iscocw <- iscocw %>% mutate(one=1) %>%
group_by(isco082d, isco882d) %>% mutate(nr_08_per_88=sum(one)) %>%
ungroup()
iscocw2d <- iscocw %>% group_by(isco082d) %>%
filter(nr_08_per_88==max(nr_08_per_88)) %>%
filter(row_number(nr_08_per_88) == 1) %>%
dplyr::select(isco082d, isco882d) %>%
ungroup()
# prepare arntz data
# pool data across relevant countries
arntz_pool <- arntz_full %>%
group_by(isco08dig2) %>%
summarise(arntz_score = weighted.mean(sh_highrisk, obs)) %>%
ungroup() %>%
dplyr::rename(isco082d=isco08dig2)
arntz <- merge(arntz_pool, iscocw2d, by="isco082d")
arntz <- arntz %>% dplyr::select(isco882d, arntz_score) %>% dplyr::rename(isco2d=isco882d) %>%
group_by(isco2d) %>% summarise(arntz_score = mean(arntz_score, na.rm=T))
# merge
task3arntz <- merge(cw_task3_isco, arntz, by=c("isco2d"), all=TRUE)
# increase distance between task group with missing value analoguously isco=10
placeholder <- c(50, 3, 0.4, NA, NA)
task3arntz <- rbind(task3arntz, placeholder)
task3arntz <- arrange(task3arntz, isco2d)
task3arntz$xlabel <- factor(task3arntz$isco2d , levels = task3arntz[order(task3arntz$task3), 1])
task3arntz$legend <- NA
task3arntz$legend[task3arntz$task3==1] <- "Non-Routine Cognitive"
task3arntz$legend[task3arntz$task3==2] <- "Routine"
task3arntz$legend[task3arntz$task3==3] <- "Non-Routine Manual"
task3arntz$legend <- factor(task3arntz$legend, levels = c("Non-Routine Cognitive", "Routine", "Non-Routine Manual"))
task3arntz <- filter(task3arntz, !is.na(task3))
# plotting
ggplot(task3arntz, aes(x=legend, y=arntz_score)) +
geom_hline(aes(yintercept=0), color="red", linetype="dashed") +
geom_boxplot() +
ylab("Susceptibility to Automation (Arntz et al.)") + xlab("Boxplots by Task Group")
ggsave(paste0(outpath, "task_arntz_boxplot.eps"), height=8, width=8)
# scatter
plot_t3risk <- ggplot(task3arntz, aes(x=xlabel, y=arntz_score, group=factor(legend), shape=factor(legend), color=factor(legend))) +
geom_point(aes(x=xlabel, y=arntz_score, group=factor(legend), shape=factor(legend), color=factor(legend), size=weight)) +
geom_text(aes(label=isco2d), vjust=2, color="black") +
scale_colour_grey() +
geom_hline(aes(yintercept=0), color="red", linetype="dashed") +
ylab("Susceptibility to Automation (Arntz et al.)") + xlab("Occupations (ISCO88) by Task Group, Weighted by Share") +
theme(axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
legend.position ="bottom",
legend.title=element_blank())
plot_t3risk + guides(size=FALSE)
ggsave(paste0(outpath, "task_arntz_scatter.eps"), height=8, width=8)
task3arntz
# Create and Analyze Pooled Panels
# created: tk, Tue Nov  5 13:51:39 2019 ---
# Table 1
# Table 2
# Figure 2
# Table 3
# update:
# Preparation
rm(list = ls())
dev.off()
cat("\014")
# globals
options(scipen=999)
# packages
if (!require("pacman")) install.packages("pacman")
pacman::p_load(tidyverse, broom, hrbrthemes, plm, estimatr, sandwich, lmtest, AER, lfe, huxtable, margins, readstata13, texreg, reshape2, readxl)
# ggplot theme
theme_set(theme_bw() + theme(text = element_text(size=20)))
# often used code
# filter(row_number(value) == 1
# which(colnames(df)==varnameinquotes)
# directories
path <- "~/Dropbox/projects/transitionpattern/submission/5_CPS/submission/CPS_revision/data/"
outpath <- "~/Dropbox/projects/transitionpattern/submission/5_CPS/submission/CPS_revision/out/"
shp_full <- read.dta13(paste0(path, "shp_CPS.dta"), convert.factors=FALSE)
shp_pool <- shp_full %>% dplyr::select(idpers, task3, edu, female, incomem)
names(shp_full)
shp_pool <- shp_full %>% dplyr::select(idpers, task3, isced, female, incomem)
soep_full <- read.dta13(paste0(path, "soep_CPS.dta"), convert.factors=FALSE)
uk_full <- read.dta13(paste0(path, "bhps_CPS.dta"), convert.factors=FALSE)
shp_pool <- shp_full %>% dplyr::select(idpers, task3, isced, female, incomem)
soep_pool <- soep_full %>% dplyr::select(idpers, task3, isced, female, incomem)
uk_pool <- uk_full %>% dplyr::select(idpers, task3, isced, female, incomem)
head(soep_full$id)
names(soep_full)
names(uk_full)
cor(shp_full$id, shp_full$idpers)
head(soep_full$id)
head(uk_full$id)
shp_pool <- shp_full %>% dplyr::select(id, task3, isced, female, incomem)
soep_pool <- soep_full %>% dplyr::select(id, task3, isced, female, incomem)
uk_pool <- uk_full %>% dplyr::select(id, task3, isced, female, incomem)
shp_pool$cntry <- "CH"
soep_pool$cntry <- "DE"
uk_pool$cntry <- "UK"
names(uk_full)
table(uk_full$edu)
table(shp_full$isced)
table(soep_full$isced)
uk_full$isced <- uk_full$edu
shp_pool <- shp_full %>% dplyr::select(id, task3, isced, female, incomem)
soep_pool <- soep_full %>% dplyr::select(id, task3, isced, female, incomem)
uk_pool <- uk_full %>% dplyr::select(id, task3, isced, female, incomem)
shp_pool$cntry <- "CH"
soep_pool$cntry <- "DE"
uk_pool$cntry <- "UK"
soep_full %>% group_by(isced) %>% summarise(meanage=mean(age, na.rm=T))
table(soep_full$isced)
table(shp_full$isced)
shp_pool <- shp_full %>% dplyr::select(id, task3, isced, female, incomem, age)
soep_pool <- soep_full %>% dplyr::select(id, task3, isced, female, incomem, age)
uk_pool <- uk_full %>% dplyr::select(id, task3, isced, female, incomem, age)
shp_pool$cntry <- "CH"
soep_pool$cntry <- "DE"
uk_pool$cntry <- "UK"
shp_pool$lowskilled <- ifelse(shp_pool$isced==0 | shp_pool$isced==1, 1, 0) # isced==0 or 1; not completed or first stage basic edu (primary)
shp_pool$lowskilled[is.na(shp_pool$isced)] <- NA
soep_pool$lowskilled <- ifelse(soep_pool$isced==1, 1, 0) # pgisced97==1 -> inadequately trained
soep_pool$lowskilled[is.na(soep_pool$isced)] <- NA
soep_pool$lowskilled[soep_pool$isced==0] <- NA # in school
uk_pool$lowskilled <- ifelse(uk_pool$isced==0, 1, 0) # hiqual_dv==9 --> no qualification
uk_pool$lowskilled[is.na(uk_pool$isced)] <- NA
prop.table(shp$full$task, shp$full$lowskilled)
prop.table(shp_pool$task, shp_pool$lowskilled)
prop.table(table(shp_pool$task, shp_pool$lowskilled))
prop.table(table(shp_pool$task, shp_pool$lowskilled), 1)
prop.table(table(soep_pool$task, soep_pool$lowskilled), 1)
shp_pool$lowskilled <- ifelse(shp_pool$isced<=2, 1, 0) # isced==0 or 1; not completed or first stage basic edu (primary); 2=lower secondary
shp_pool$lowskilled[is.na(shp_pool$isced)] <- NA
soep_pool$lowskilled <- ifelse(soep_pool$isced<=2, 1, 0) # pgisced97==1 -> inadequately trained | pgisced==2 -> general elementary
soep_pool$lowskilled[is.na(soep_pool$isced)] <- NA
soep_pool$lowskilled[soep_pool$isced==0] <- NA # in school
uk_pool$lowskilled <- ifelse(uk_pool$isced<=2, 1, 0) # hiqual_dv==9 --> no qualification (0), 5=other (1), 4=gcse(2)
uk_pool$lowskilled[is.na(uk_pool$isced)] <- NA
prop.table(table(soep_pool$task, soep_pool$lowskilled), 1)
prop.table(table(shp_pool$task, shp_pool$lowskilled), 1)
prop.table(table(uk_pool$task, uk_pool$lowskilled), 1)
table(uk_pool$isced)
prop.table(table(uk_pool$isced))
uk_pool$lowskilled <- ifelse(uk_pool$isced<=1, 1, 0) # hiqual_dv==9 --> no qualification (0), 5=other (1), 4=gcse(2)
uk_pool$lowskilled[is.na(uk_pool$isced)] <- NA
prop.table(table(uk_pool$task, uk_pool$lowskilled), 1)
head(uk_pool)
head(shp_pool)
prop.table(table(uk_pool$task, uk_pool$incomem), 1)
pool <- data.frame(rbind(shp_pool, soep_pool, uk_pool))
pool %>% mutate(one=1) %>%
dplyr::filter(!is.na(task3)) %>%
group_by(cntry) %>% mutate(nr_total=sum(one)) %>%
ungroup() %>%
group_by(cntry, task3) %>% mutate(nr_task=sum(one)) %>%
summarise(share=nr_task/nr_total, sharefemale=mean(female, na.rm=T), sharelowskilled=mean(lowskilled, na.rm=T), medianinc=median(incomem, na.rm=T))
pool %>% mutate(one=1) %>%
dplyr::filter(!is.na(task3)) %>%
group_by(cntry) %>% mutate(nr_total=sum(one)) %>%
ungroup() %>%
group_by(cntry, task3) %>% mutate(nr_task=sum(one), share=nr_task/nr_total) %>%
summarise(sharetask=mean(share, na.rm=T), sharefemale=mean(female, na.rm=T), sharelowskilled=mean(lowskilled, na.rm=T), medianinc=median(incomem, na.rm=T))
prop.table(table(uk_full$task))
prop.table(table(uk_full$task3))
prop.table(table(soep_full$task3))
prop.table(table(sohp_full$task3))
prop.table(table(shp_full$task3))
prop.table(table(shp_full$task3, shp_full$female))
prop.table(table(shp_full$task3, shp_full$female), 1)
table(shp_full$task3, shp_full$female)
table(shp_full$female)
table(shp_full$female[!is.na(shp_full$isco)])
pool$legend <- NA
pool$legend[pool$task3==1] <- "Non-Routine Cognitive"
pool$legend[pool$task3==2] <- "Routine"
pool$legend[pool$task3==3] <- "Non-Routine Manual"
pool %>% mutate(one=1) %>%
dplyr::filter(!is.na(task3)) %>%
group_by(cntry) %>% mutate(nr_total=sum(one)) %>%
ungroup() %>%
group_by(cntry, legend) %>% mutate(nr_task=sum(one), share=nr_task/nr_total) %>%
summarise(sharetask=mean(share, na.rm=T), sharefemale=mean(female, na.rm=T), sharelowskilled=mean(lowskilled, na.rm=T), medianinc=median(incomem, na.rm=T))
pool$legend <- NA
pool$legend[pool$task3==1] <- "Non-Routine Cognitive"
pool$legend[pool$task3==2] <- "Routine"
pool$legend[pool$task3==3] <- "Non-Routine Manual"
pool$legend <- factor(pool$legend, levels = c("Non-Routine Cognitive", "Routine", "Non-Routine Manual"))
pool %>% mutate(one=1) %>%
dplyr::filter(!is.na(task3)) %>%
group_by(cntry) %>% mutate(nr_total=sum(one)) %>%
ungroup() %>%
group_by(cntry, legend) %>% mutate(nr_task=sum(one), share=nr_task/nr_total) %>%
summarise(sharetask=mean(share, na.rm=T), sharefemale=mean(female, na.rm=T), sharelowskilled=mean(lowskilled, na.rm=T), medianinc=median(incomem, na.rm=T))
pool %>% mutate(one=1) %>%
dplyr::filter(!is.na(task3)) %>%
group_by(cntry) %>% mutate(nr_total=sum(one)) %>%
ungroup() %>%
group_by(cntry, legend) %>% mutate(nr_task=sum(one), share=nr_task/nr_total) %>%
summarise(sharetask=round(mean(share, na.rm=T)*100, 1),
sharefemale=mean(female, na.rm=T),
sharelowskilled=mean(lowskilled, na.rm=T),
medianinc=median(incomem, na.rm=T))
pool %>% mutate(one=1) %>%
dplyr::filter(!is.na(task3)) %>%
group_by(cntry) %>% mutate(nr_total=sum(one)) %>%
ungroup() %>%
group_by(cntry, legend) %>% mutate(nr_task=sum(one), share=nr_task/nr_total) %>%
summarise(sharetask=round(mean(share, na.rm=T)*100, 1),
sharefemale=round(mean(female, na.rm=T)*100, 1),
sharelowskilled=round(mean(lowskilled, na.rm=T)*100, 1),
medianinc=median(incomem, na.rm=T))
table1 <- pool %>% mutate(one=1) %>%
dplyr::filter(!is.na(task3)) %>%
group_by(cntry) %>% mutate(nr_total=sum(one)) %>%
ungroup() %>%
group_by(cntry, legend) %>% mutate(nr_task=sum(one), share=nr_task/nr_total) %>%
summarise(sharetask=round(mean(share, na.rm=T)*100, 1),
sharefemale=round(mean(female, na.rm=T)*100, 1),
sharelowskilled=round(mean(lowskilled, na.rm=T)*100, 1),
medianinc=median(incomem, na.rm=T))
print(xtable(table1, type = "latex"), file = paste0(outpath, "table1.tex"))
library(xtable)
table1 <- pool %>% mutate(one=1) %>%
dplyr::filter(!is.na(task3)) %>%
group_by(cntry) %>% mutate(nr_total=sum(one)) %>%
ungroup() %>%
group_by(cntry, legend) %>% mutate(nr_task=sum(one), share=nr_task/nr_total) %>%
summarise(sharetask=round(mean(share, na.rm=T)*100, 1),
sharefemale=round(mean(female, na.rm=T)*100, 1),
sharelowskilled=round(mean(lowskilled, na.rm=T)*100, 1),
medianinc=median(incomem, na.rm=T))
print(xtable(table1, type = "latex"), file = paste0(outpath, "table1.tex"))
table1
print(xtable(table1, type = "latex"), digits=c(1,1,1,0), file = paste0(outpath, "table1.tex"))
print(xtable(table1, type = "latex", digits=c(1,1,1,0)), file = paste0(outpath, "table1.tex"))
print(xtable(table1, type = "latex", digits=c(0,0,0,1,1,1,0)), file = paste0(outpath, "table1.tex"))
print(xtable(table1, type = "latex", include.rownames=FALSE, digits=c(0,0,1,1,1,0)), file = paste0(outpath, "table1.tex"))
table1 <- pool %>% mutate(one=1) %>%
dplyr::filter(!is.na(task3)) %>%
group_by(cntry) %>% mutate(nr_total=sum(one)) %>%
ungroup() %>%
group_by(cntry, legend) %>% mutate(nr_task=sum(one), share=nr_task/nr_total) %>%
summarise(sharetask=round(mean(share, na.rm=T)*100, 1),
sharefemale=round(mean(female, na.rm=T)*100, 1),
sharelowskilled=round(mean(lowskilled, na.rm=T)*100, 1),
medianinc=median(incomem, na.rm=T))
print(xtable(table1, type = "latex", include.rownames=FALSE, digits=c(0,0,0,1,1,1,0)), file = paste0(outpath, "table1.tex"))
print(xtable(table1, type = "latex", digits=c(0,0,0,1,1,1,0)), include.rownames=FALSE, file = paste0(outpath, "table1.tex"))
class(table1)
col.names(table1)
colnames(table1)
colnames(table1) <- c("Country", "Task Group", "Share of Labor Force", "Share Lowskilled", "Share Female", "Median Income")
print(xtable(table1, type = "latex", digits=c(0,0,0,1,1,1,0)), include.rownames=FALSE, file = paste0(outpath, "table1.tex"))
xtable
?xtable
ch <- shp_full %>% filter(!is.na(task3))
ch <- prop.table(table(ch$year, ch$task3), 1)
ch
ch <- melt(ch)
ch
ch$cntry <- "CH"
uk <- uk_full %>% filter(!is.na(task3))
uk$xweight <- NA
uk$xweight <- ifelse(uk$year<2009, uk$xweight1, 1)
uk <- prop.table(wtd.table(uk$year, uk$task3, weights=uk$xweight), 1)
ch <- shp_full %>% filter(!is.na(task3))
ch <- prop.table(table(ch$year, ch$task3), 1)
ch <- melt(ch)
ch$cntry <- "CH"
de <- soep_full %>% filter(!is.na(task3))
de <- prop.table(table(de$year, de$task3), 1)
de <- melt(de)
de$cntry <- "DE"
uk <- uk_full %>% filter(!is.na(task3))
uk <- prop.table(table(uk$year, uk$task3), 1)
uk <- melt(uk)
uk$cntry <- "UK"
all <- rbind(ch, de, uk)
head(all)
all %>%
arrange(desc(Var2)) %>%
ggplot(aes(x=Var1, y = value, fill = factor(Var2))) +
geom_bar(stat="identity", position="fill") +
# geom_text(aes(y = value, label = percent(value), fill = factor(Var2), ymax=0), size = 4, position = "fill") +
# scale_y_continuous(name="", labels = percent) +
# coord_flip() +
xlab("Year") + ylab("Proportion") +
facet_wrap(~ cntry) +
scale_fill_grey(name="Task Group", labels=c("non-routine cognitive", "routine", "non-routine manual")) +
scale_x_continuous(breaks=seq(1984, 2014, 10)) +
theme_bw() +
theme(plot.background = element_blank(), axis.title = element_text(size=14), axis.text = element_text(size=12), legend.title=element_text(size=12), legend.text=element_text(size=12))
soep_full$xweight1
uk_full$xweight
# directories
path <- "~/Dropbox/projects/transitionpattern/submission/5_CPS/submission/CPS_revision/data/"
outpath <- "~/Dropbox/projects/transitionpattern/submission/5_CPS/submission/CPS_revision/out/"
shp_full <- read.dta13(paste0(path, "shp_CPS.dta"), convert.factors=FALSE)
soep_full <- read.dta13(paste0(path, "soep_CPS.dta"), convert.factors=FALSE)
uk_full <- read.dta13(paste0(path, "bhps_CPS.dta"), convert.factors=FALSE)
uk_full$isced <- uk_full$edu
shp_full$xweight1 <- NA
uk_full$xweight1 <- NA
shp_pool <- shp_full %>% dplyr::select(id, task3, isced, female, incomem, age, xweight1)
soep_pool <- soep_full %>% dplyr::select(id, task3, isced, female, incomem, age, xweight1)
uk_pool <- uk_full %>% dplyr::select(id, task3, isced, female, incomem, age, xweight1)
head(shp_pool)
head(soep_pool)
library(questionr)
install.packages("questionr")
library(questionr)
install.packages("questionr")
library(questionr)
# update:
# Preparation
rm(list = ls())
dev.off()
cat("\014")
# globals
options(scipen=999)
install.packages("questionr")
install.packages("questionr")
install.packages("questionr")
install.packages("questionr")
?wtd.table
library(questionr)
